**Housekeeping
clear all
cap log close
log using ${rep_root}/logs/first_stage_fig.log, text replace
set more off

local cutoff = 50
local seed = 9999 
local case_lb = `cutoff'

local covarsmin d_mo_* d_dist_*
local covars vic_female_any vic_white_any acc_white acc_male acc_old cat_3 cat_4 d_mo_* d_dist_* arrests_pre complaints_pre force_pre any_prior_complaint tenure
 
 
*****************
**BRING IN CASE STATUS + COMPLAINTS DATA AND APPLY DATA RESTRICTION
***************** 
 
do ${rep_root}/code/data_restrict.do
 
*****************
**MERGE WITH OUTCOMES DATA
***************** 
merge 1:1 cr_id acc_id using ${rep_root}/data/outcomes, keep(3) nogen


*****************
**DEFINE TREATMENT AND MAKE COVARIATES
***************** 
 

**Drop investigators without enough cases
tab inv_id
sort inv_id 
by inv_id: gen inv_ct = _N
drop if inv_ct<`case_lb'
tab inv_id intake_yr

**Define some covariates
gen po = acc_rank == "POLICE OFFICER"
gen minor_inj = vic_inj_1<=1
gen vic_old = (year(inc_complaint_dt_1) - vic_byr_oldest)>=35 & !missing(vic_byr_oldest)
gen acc_old = acc_age>=35 & !missing(acc_age)
gen cat_3 = acc_cat==3
gen cat_4 = acc_cat==4
 
   
  

forvalues i = 1/25{
	gen d_dist_`i' = inc_district_1==`i'
}
forvalues yr = 2006/2008{
	gen d_yr_`yr' = intake_yr==`yr'

	forvalues m = 1/12{
		gen d_mo_yr_`m'_`yr' = intake_yr==`yr' & intake_mo==`m'
	}
	
}
forvalues yr = 2014/2018{
	gen d_yr_`yr' = intake_yr==`yr'
	
	forvalues m = 1/12{
		gen d_mo_yr_`m'_`yr' = intake_yr==`yr' & intake_mo==`m'
	}
}

gen d_noaff = finding_cd=="NO AFFIDAVIT"
gen d_unfound = finding_cd=="UNFOUNDED"
gen d_nosus = finding_cd=="NOT SUSTAINED"
gen d_exon = finding_cd=="EXONERATED"
gen d_sus = finding_cd=="SUSTAINED"
gen d_addlinv = finding_cd=="ADDITIONAL INVESTIGATION REQUESTED"

qui foreach var in vic_female_any vic_white_any acc_white acc_male acc_old cat_3 cat_4 arrests_pre complaints_pre force_pre any_prior_complaint tenure arrests_postc1_3 serious_postc1_3 complaints_postc1_3 arrests_postc4_12 serious_postc4_12 complaints_postc4_12 arrests_posty2 serious_posty2 complaints_posty2 d_noaff d_unfound d_nosus d_exon d_sus d_addlinv{
	sum `var'
	*local temp_m = r(mean)
	local ss_`var'_m : di %6.3f r(mean)
	*local temp_sd = r(sd)
	local ss_`var'_sd : di %6.3f r(sd)
}

sort mid
by mid: gen mid_ct = _N
by mid: gen mid_mark = _n==1
tab mid_mark
sum mid_ct if mid_mark==1
tab mid_ct if mid_mark==1
drop mid_ct mid_mark

sort cr_id
by cr_id: gen cr_ct = _N
by cr_id: gen cr_mark = _n==1
tab cr_mark
sum cr_ct if cr_mark==1
tab cr_ct if cr_mark==1
drop cr_ct cr_mark

**Define treatment
 

gen treat_affidavit = finding_cd != "NO AFFIDAVIT" 
gen treat_sustain = finding_cd == "SUSTAINED"   

gen era = inrange(intake_yr, 2006, 2009) 

foreach t in treat_affidavit treat_sustain{

**Create LOM
capture drop resid cr_resid cr_ct inv_tot_resid case inv_ct2  
qui reg `t' `covars'
predict resid, residuals

sort cr_id
by cr_id: egen cr_resid = total(resid)
by cr_id: gen cr_ct = _N

*sort inv_id intake_yr intake_mo
sort inv_id intake_yr
gen case = 1
by inv_id intake_yr: egen inv_tot_resid = total(resid)
by inv_id intake_yr: egen inv_ct2 = total(case)
gen lom_`t' = (inv_tot_resid - cr_resid)/(inv_ct2-cr_ct)

 


sort inv_id
by inv_id: sum lom_`t'
}
 
*****************
**STORE FIRST STAGE
*****************  


**First stage
foreach t in treat_affidavit treat_sustain{

sum `t'
local first_mean = r(mean)

reg `t' lom_`t' `covars', cluster(inv_id)
local first = _b[lom_`t']
local first_se = _se[lom_`t']
local first_N = e(N)
boottest lom_`t', weight(webb) nograph seed(`seed')
local first_f = r(t)^2
local first_p = r(p)

local first : di %6.3f `first'
local first_f : di %6.3f `first_f'

local first_mean_`t' : di %6.3f `first_mean'
local first_`t' : di %6.3f `first'
local first_se_`t' : di %6.3f `first_se'
local first_p_`t' : di %6.3f `first_p'
glo first_star_`t' = cond(`first_p_`t'' <0.01, "***", cond(`first_p_`t'' <0.05, "**", cond(`first_p_`t'' <0.1, "*", "")))
local first_f_`t' : di %6.3f `first_f'
local first_N_`t' : di %6.0f `first_N'
 
}
tempfile main
save `main'

****************************
**INVESTIGATOR LEVEL STATS**
****************************
sort inv_id intake_yr
by inv_id intake_yr: egen inv_yr_lom = mean(lom_treat_affidavit)
by inv_id intake_yr: gen inv_yr_ct = _N



by inv_id intake_yr: keep if _n==1
by inv_id: gen inv_yrs = _N
by inv_id: egen inv_lom = mean(inv_yr_lom) 
by inv_id: egen within_inv_sd = sd(inv_yr_lom)

sum inv_yr_lom within_inv_sd inv_yr_ct, d

preserve
keep inv_id intake_yr inv_yr_lom 
reshape wide inv_yr_lom, i(intake_yr) j(inv_id) 

expand 6 if intake_yr==2008, gen(_exp)
replace intake_yr = 2008 + (_n-8) if _exp==1 
foreach inv in 5 6 7 10 12 14 15 18 20 24 28{
	replace inv_yr_lom`inv'= . if _exp==1
}
sort intake_yr
merge 1:1 intake_yr using ${rep_root}/data/dyad_continuity, keep(1 3) nogen 

twoway /// 
(line inv_yr_lom5 intake_yr, cmissing(n) yaxis(1)) ///
(line inv_yr_lom6 intake_yr, cmissing(n) yaxis(1)) ///
(line inv_yr_lom7 intake_yr, cmissing(n) yaxis(1)) ///
(line inv_yr_lom10 intake_yr, cmissing(n) yaxis(1)) ///
(line inv_yr_lom12 intake_yr, cmissing(n) yaxis(1)) ///
(line inv_yr_lom14 intake_yr, cmissing(n) yaxis(1)) ///
(line inv_yr_lom15 intake_yr, cmissing(n) yaxis(1)) ///
(line inv_yr_lom18 intake_yr, cmissing(n) yaxis(1)) ///
(line inv_yr_lom20 intake_yr, cmissing(n) yaxis(1)) ///
(line inv_yr_lom24 intake_yr, cmissing(n) yaxis(1)) ///
(line inv_yr_lom28 intake_yr, cmissing(n) yaxis(1)) ///
(line avg_continuity intake_yr, cmissing(n) lcolor(black) lpattern(dot) yaxis(2)) ///
, ///
legend(order (1 "Inv. 5" 2 "Inv. 6" 3 "Inv. 7" 4 "Inv. 10" 5 "Inv. 12" 6 "Inv. 14" 7 "Inv. 15" 8 "Inv. 18" 9 "Inv. 20" 10 "Inv. 24" 11 "Inv. 28" 12 "Sup-Inv Dyad Continuity")) ///
xtitle("Year") ytitle("Residualized Investigator Affidavit Rate", axis(1)) ytitle("Sup-Inv Dyad Continuity", axis(2))
graph export ${rep_root}/output/figures/investigator_lom_ts.png, replace 
restore

by inv_id: keep if _n==1 
egen between_inv_sd = sd(inv_lom) 
list inv_id inv_ct inv_yrs inv_lom within_inv_sd between_inv_sd
sum within_inv_sd [fweight=inv_ct]

clear 

*******************************************************************************
* FIGURE  DISTRIBUTION OF JUDGE INSTRUMENT
*******************************************************************************

 use `main'
* residualize fe
qui reg treat_affidavit lom_treat_affidavit `covars'
predict resid_treat_affidavit, resid
gen resid1 = resid_treat_affidavit + _b[lom_treat_affidavit]*lom_treat_affidavit+ _b[_cons]
*gen resid2 = resid_treat_affidavit + _b[_cons]


*top and bottom 2% of judge stringency density
 
_pctile lom_treat_affidavit, n(100)
scalar pl_lom_treat_affidavit = r(r2)
scalar pu_lom_treat_affidavit = r(r98)

sort inv_id intake_yr 
by inv_id intake_yr: egen resid1_mean = mean(resid1)
by inv_id intake_yr: egen lom_mean = mean(lom_treat_affidavit)
by inv_id intake_yr: gen ct = _N
by inv_id intake_yr: keep if _n==1

* run ll regression on residualized data
lpoly resid1_mean lom_mean [aweight=ct], nograph degree(0) bw(0.04) gen(fs_x fs_y) n(100) se(se)

*scatter treat_affidavit lom_treat_affidavit

* store data
keep fs_x fs_y se
drop if fs_x==.
sort fs_x

tempfile locallinear
save `locallinear'

**** load full dataset and create figure
use `main', clear
keep lom_treat_affidavit

append using `locallinear'


gen upper = fs_y + 1.96*se
gen lower = fs_y - 1.96*se
display pu_lom_treat_affidavit
display pl_lom_treat_affidavit


twoway hist lom_treat_affidavit if inrange(lom_treat_affidavit,pl_lom_treat_affidavit,pu_lom_treat_affidavit), width(.02) frac fcolor(gs10) lcolor(white) yaxis(1)  ///
	|| line fs_y fs_x if inrange(fs_x,pl_lom_treat_affidavit,pu_lom_treat_affidavit) , lc(black) lw(.6) yaxis(2) ///
	|| line upper fs_x if inrange(fs_x,pl_lom_treat_affidavit,pu_lom_treat_affidavit), lc(gs8) lw(.3) yaxis(2) lp(dash) ///
	|| line lower fs_x if inrange(fs_x,pl_lom_treat_affidavit,pu_lom_treat_affidavit), lc(gs8) lw(.3) yaxis(2) lp(dash) ///
	title("", size(large) color(black))  ///
	ytitle("Fraction of Sample", size(medlarge) axis(1))  /// 
	ytitle("Residualized Rate of Affidavit Acquisiton", size(medlarge) axis(2)) ///
	xtitle("Supervisor Affidavit Acquisiton Tendency", size(medium)) ///
	legend(off) ///
	ylabel(0(.05).2 , nogrid axis(1)) ///
	ylabel(0.2(.4)1.4, nogrid axis(2)) ///
	xlabel(-0.45(.15)0.3 , nogrid) ///
	graphregion(color(white)) bgcolor(white) 

graph export ${rep_root}/output/figures/first_stage_fig.png, replace 